rm(list=ls())
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stringr)
library(lubridate)
library(reshape2)
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.3.3
library(ggthemes)
data_FemaleLFPR <- read.csv("../data/data_FemaleLFPR.csv", skip=4)
data_LFPR <- read.csv("../data/data_LFPR.csv", skip=4)
data_PropFemale <- read.csv("../data/data_Population%Female.csv", skip=4)
data_PopulationAgeStructure <- read.csv("../data/data_PopulationAgeStructure.csv", skip=0)
data_Population <- read.csv("../data/data_Population.csv", skip=4)
data_FertilityRate <- read.csv("../data/data_FertilityRate.csv", skip=4)
data_TertiaryEducation <- read.csv("../data/data_TertiaryEducation.csv", skip=4)
data_GDPPerCapita <- read.csv("../data/data_GDPPerCapita.csv", skip=4)
data_Unemployment <- read.csv("../data/data_Unemployment.csv", skip=4)
data_GiniCoefficient <- read.csv("../data/data_GiniCoefficient.csv", skip=4)
data_Agriculture <- read.csv("../data/data_Agriculture.csv", skip=4)
data_Manufacturing <- read.csv("../data/data_Manufacturing.csv", skip=4)
data_Industry <- read.csv("../data/data_Industry.csv", skip=4)
data_Services <- read.csv("../data/data_Services.csv", skip=4)
data_ServicesEmployment <- read.csv("../data/data_ServiceEmployment.csv", skip=4)
data_ExpenditureonFamily <- read.csv("../data/data_GDP%ExpenditureonFamily.csv", skip=0)
data_ExpenditureonIncapacity <- read.csv("../data/data_GDP%ExpenditureonIncapacity.csv", skip=0)
data_WelfareCoverage <- read.csv("../data/data_Population%Covered.csv", skip=0)
data_GenderPayGap <- read.csv("../data/data_GenderPayGap.csv")
data_MaternityLeave <- read.csv("../data/data_MaternityLeave.csv")
data_PaternityLeave <- read.csv("../data/data_PaternityLeave.csv")
data_GenderEquality <- read.csv("../data/data_GenderEquality.csv")
First, finding non-coutntry datapoints from World Bank data to be filtered out later:
nonCountryDatapointsa <- c("Africa Eastern and Southern",
"Africa Western and Central",
"Arab World",
"Caribbean small states",
"Central Europe and the Baltics",
"Early-demographic dividend",
"East Asia & Pacific",
"East Asia & Pacific (IDA & IBRD countries)",
"East Asia & Pacific (excluding high income)",
"Euro area",
"Europe & Central Asia",
"Europe & Central Asia (IDA & IBRD countries)",
"Europe & Central Asia (excluding high income)",
"European Union",
"Fragile and conflict affected situations",
"Heavily indebted poor countries (HIPC)",
"High income",
"IBRD only",
"IDA & IBRD total",
"IDA blend",
"IDA only",
"IDA total",
"Late-demographic dividend",
"Latin America & Caribbean",
"Latin America & Caribbean (excluding high income)",
"Latin America & Caribbean (excluding high income) LAC",
"Latin America & the Caribbean (IDA & IBRD countries)",
"Latin America & the Caribbean (IDA & IBRD countries) TLA",
"Least developed countries: UN classification",
"Low & middle income",
"Low income",
"Lower middle income",
"Middle East & North Africa",
"Middle East & North Africa (IDA & IBRD countries)",
"Middle East & North Africa (excluding high income)",
"Middle income",
"North America",
"Not classified",
"OECD members",
"Other small states",
"Pacific island small states",
"Post-demographic dividend",
"Pre-demographic dividend",
"Small states",
"South Asia",
"South Asia (IDA & IBRD)",
"Sub-Saharan Africa",
"Sub-Saharan Africa (IDA & IBRD countries)",
"Sub-Saharan Africa (excluding high income)",
"Upper middle income",
"World")
Function for cleaning World Bank data:
clean_WB <- function(df, measure) {
dplyr::filter(df, ! Country.Name %in% nonCountryDatapointsa) %>%
select(Country.Name, Country.Code, X2020:X2024) %>%
pivot_longer(
cols = X2020:X2024,
names_to = "Year",
values_to = "value"
) %>%
drop_na(value) %>%
group_by(Country.Name) %>%
summarise(
value = last(value),
Country.Code = last(Country.Code),
.groups = "drop"
) %>%
rename(!!measure := value)
}
head(data_FemaleLFPR)
## Country.Name Country.Code
## 1 Aruba ABW
## 2 Africa Eastern and Southern AFE
## 3 Afghanistan AFG
## 4 Africa Western and Central AFW
## 5 Angola AGO
## 6 Albania ALB
## Indicator.Name
## 1 Labor force participation rate, female (% of female population ages 15+) (modeled ILO estimate)
## 2 Labor force participation rate, female (% of female population ages 15+) (modeled ILO estimate)
## 3 Labor force participation rate, female (% of female population ages 15+) (modeled ILO estimate)
## 4 Labor force participation rate, female (% of female population ages 15+) (modeled ILO estimate)
## 5 Labor force participation rate, female (% of female population ages 15+) (modeled ILO estimate)
## 6 Labor force participation rate, female (% of female population ages 15+) (modeled ILO estimate)
## Indicator.Code X1960 X1961 X1962 X1963 X1964 X1965 X1966 X1967 X1968 X1969
## 1 SL.TLF.CACT.FE.ZS NA NA NA NA NA NA NA NA NA NA
## 2 SL.TLF.CACT.FE.ZS NA NA NA NA NA NA NA NA NA NA
## 3 SL.TLF.CACT.FE.ZS NA NA NA NA NA NA NA NA NA NA
## 4 SL.TLF.CACT.FE.ZS NA NA NA NA NA NA NA NA NA NA
## 5 SL.TLF.CACT.FE.ZS NA NA NA NA NA NA NA NA NA NA
## 6 SL.TLF.CACT.FE.ZS NA NA NA NA NA NA NA NA NA NA
## X1970 X1971 X1972 X1973 X1974 X1975 X1976 X1977 X1978 X1979 X1980 X1981 X1982
## 1 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 4 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA NA NA NA NA NA NA
## X1983 X1984 X1985 X1986 X1987 X1988 X1989 X1990 X1991 X1992 X1993
## 1 NA NA NA NA NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA 65.53770 65.69234 65.85133 65.93088
## 3 NA NA NA NA NA NA NA 15.78900 15.74000 15.66000 15.54400
## 4 NA NA NA NA NA NA NA 69.94014 69.93172 69.81323 69.77922
## 5 NA NA NA NA NA NA NA 75.60700 75.57200 75.54000 75.51300
## 6 NA NA NA NA NA NA NA 50.84600 53.40700 54.03600 53.20400
## X1994 X1995 X1996 X1997 X1998 X1999 X2000 X2001
## 1 NA NA NA NA NA NA NA NA
## 2 66.13733 66.33431 66.32750 66.23818 66.22135 66.23986 66.26010 66.27387
## 3 15.39300 15.22600 15.04400 14.84800 14.65300 14.47100 14.32100 14.22300
## 4 69.73839 69.67772 69.57907 69.49287 69.40707 69.40180 69.35713 69.29550
## 5 75.49200 75.48100 75.48000 75.48600 75.49800 75.51300 75.53100 75.55100
## 6 52.51400 51.52100 50.88000 51.71000 51.08600 50.26600 49.84900 49.35300
## X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
## 1 NA NA NA NA NA NA NA NA
## 2 66.35391 66.44016 66.47926 66.52541 66.37408 66.13387 65.88168 65.51176
## 3 14.17700 14.17800 14.22800 14.32300 14.47200 14.68600 14.95700 15.25600
## 4 69.15380 69.02790 68.90672 68.76926 68.63647 68.54642 68.31940 68.07177
## 5 75.57000 75.58900 75.60600 75.61800 75.62600 75.63100 75.63100 75.62600
## 6 49.08800 48.46100 47.84500 47.24200 46.65200 46.07600 45.51300 45.95500
## X2010 X2011 X2012 X2013 X2014 X2015 X2016 X2017
## 1 NA NA NA NA NA NA NA NA
## 2 64.77445 65.32562 65.16964 65.12264 64.16107 63.71232 63.12078 62.39547
## 3 15.57600 15.89500 16.20800 17.13800 18.10700 19.11300 20.15600 21.24000
## 4 67.82694 67.69748 67.49655 67.14115 66.78181 66.46139 66.23346 65.72373
## 5 75.62100 74.86500 74.63300 74.40100 74.16700 73.93300 73.69800 73.46100
## 6 46.83500 52.56100 48.93700 43.78300 43.95800 47.11600 49.88800 49.73600
## X2018 X2019 X2020 X2021 X2022 X2023 X2024 X
## 1 NA NA NA NA NA NA NA NA
## 2 61.84047 61.35699 59.77301 60.27955 60.44514 64.07521 63.90776 NA
## 3 19.80800 18.30400 16.47300 14.66600 5.15900 5.15500 5.10400 NA
## 4 65.42507 65.17822 64.43911 64.78000 64.73852 66.34934 66.18173 NA
## 5 73.22400 72.98500 72.73100 74.69500 73.17900 73.22400 73.14000 NA
## 6 51.40700 52.93000 50.61600 51.88700 53.18300 53.20700 53.51200 NA
head(data_LFPR)
## Country.Name Country.Code
## 1 Aruba ABW
## 2 Africa Eastern and Southern AFE
## 3 Afghanistan AFG
## 4 Africa Western and Central AFW
## 5 Angola AGO
## 6 Albania ALB
## Indicator.Name
## 1 Labor force participation rate, total (% of total population ages 15+) (modeled ILO estimate)
## 2 Labor force participation rate, total (% of total population ages 15+) (modeled ILO estimate)
## 3 Labor force participation rate, total (% of total population ages 15+) (modeled ILO estimate)
## 4 Labor force participation rate, total (% of total population ages 15+) (modeled ILO estimate)
## 5 Labor force participation rate, total (% of total population ages 15+) (modeled ILO estimate)
## 6 Labor force participation rate, total (% of total population ages 15+) (modeled ILO estimate)
## Indicator.Code X1960 X1961 X1962 X1963 X1964 X1965 X1966 X1967 X1968 X1969
## 1 SL.TLF.CACT.ZS NA NA NA NA NA NA NA NA NA NA
## 2 SL.TLF.CACT.ZS NA NA NA NA NA NA NA NA NA NA
## 3 SL.TLF.CACT.ZS NA NA NA NA NA NA NA NA NA NA
## 4 SL.TLF.CACT.ZS NA NA NA NA NA NA NA NA NA NA
## 5 SL.TLF.CACT.ZS NA NA NA NA NA NA NA NA NA NA
## 6 SL.TLF.CACT.ZS NA NA NA NA NA NA NA NA NA NA
## X1970 X1971 X1972 X1973 X1974 X1975 X1976 X1977 X1978 X1979 X1980 X1981 X1982
## 1 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 4 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA NA NA NA NA NA NA
## X1983 X1984 X1985 X1986 X1987 X1988 X1989 X1990 X1991 X1992 X1993
## 1 NA NA NA NA NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA 72.11155 72.17933 72.27927 72.27015
## 3 NA NA NA NA NA NA NA 47.25100 47.19800 47.13700 47.06700
## 4 NA NA NA NA NA NA NA 76.59380 76.61586 76.51132 76.54951
## 5 NA NA NA NA NA NA NA 77.27100 77.31900 77.36400 77.40400
## 6 NA NA NA NA NA NA NA 61.78300 64.29900 64.88300 64.05700
## X1994 X1995 X1996 X1997 X1998 X1999 X2000 X2001
## 1 NA NA NA NA NA NA NA NA
## 2 72.35346 72.49353 72.52087 72.48101 72.41696 72.33893 72.28231 72.22234
## 3 46.98600 46.90500 46.82700 46.75100 46.68200 46.62100 46.57100 46.53300
## 4 76.56316 76.53043 76.41852 76.33737 76.26321 76.23509 76.11999 75.98527
## 5 77.43900 77.45600 77.45600 77.44700 77.43000 77.41100 77.38900 77.36400
## 6 63.35800 62.34300 61.66300 62.46400 61.79500 60.91300 60.43500 59.88800
## X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
## 1 NA NA NA NA NA NA NA NA
## 2 72.20879 72.20830 72.16941 72.14703 71.98197 71.76300 71.53610 71.14870
## 3 46.50800 46.49900 46.50700 46.53200 46.57000 46.62100 46.68000 46.74300
## 4 75.68472 75.49284 75.29329 75.10964 74.93536 74.73455 74.43295 74.09511
## 5 77.33900 77.31300 77.29000 77.27200 77.26000 77.25200 77.25200 77.25600
## 6 59.61000 58.55700 57.49600 56.42800 55.35400 54.27500 53.19200 54.99300
## X2010 X2011 X2012 X2013 X2014 X2015 X2016 X2017
## 1 NA NA NA NA NA NA NA NA
## 2 70.46491 70.92204 70.87421 70.83214 69.95276 69.62207 69.13831 68.50009
## 3 46.81100 46.88100 46.95600 47.02600 47.09600 47.16500 47.23500 47.30500
## 4 73.76231 73.53483 73.25900 72.90333 72.53744 72.27095 72.21715 71.75875
## 5 77.26000 77.31500 77.03700 76.75600 76.47300 76.18700 75.89900 75.60900
## 6 55.20200 59.93800 56.99500 52.41800 53.42000 55.49700 57.31400 58.05700
## X2018 X2019 X2020 X2021 X2022 X2023 X2024 X
## 1 NA NA NA NA NA NA NA NA
## 2 68.08854 67.74673 66.29093 66.79551 67.00764 69.30694 69.18241 NA
## 3 45.57000 43.82300 41.57900 40.93200 37.64000 37.67300 37.49600 NA
## 4 71.53272 71.32954 70.80324 70.90759 70.91841 71.41612 71.28294 NA
## 5 75.31600 75.02100 74.72900 76.37200 75.61500 75.75000 75.70300 NA
## 6 59.29600 60.30600 57.78000 58.86900 60.42100 60.43200 60.67500 NA
data_FemaleLFPR_tidy <- clean_WB(data_FemaleLFPR, "Female.LFPR")
data_LFPR_tidy <- clean_WB(data_LFPR, "LFPR")
head(data_FemaleLFPR_tidy)
## # A tibble: 6 × 3
## Country.Name Female.LFPR Country.Code
## <chr> <dbl> <chr>
## 1 Afghanistan 5.10 AFG
## 2 Albania 53.5 ALB
## 3 Algeria 14.0 DZA
## 4 Angola 73.1 AGO
## 5 Argentina 52.9 ARG
## 6 Armenia 56.4 ARM
head(data_PropFemale)
## Country.Name Country.Code
## 1 Aruba ABW
## 2 Africa Eastern and Southern AFE
## 3 Afghanistan AFG
## 4 Africa Western and Central AFW
## 5 Angola AGO
## 6 Albania ALB
## Indicator.Name Indicator.Code X1960
## 1 Population, female (% of total population) SP.POP.TOTL.FE.ZS 50.84848
## 2 Population, female (% of total population) SP.POP.TOTL.FE.ZS 50.48167
## 3 Population, female (% of total population) SP.POP.TOTL.FE.ZS 48.08829
## 4 Population, female (% of total population) SP.POP.TOTL.FE.ZS 50.36868
## 5 Population, female (% of total population) SP.POP.TOTL.FE.ZS 49.36403
## 6 Population, female (% of total population) SP.POP.TOTL.FE.ZS 49.63153
## X1961 X1962 X1963 X1964 X1965 X1966 X1967 X1968
## 1 50.83395 50.83097 50.83069 50.82525 50.82660 50.83825 50.84082 50.83716
## 2 50.48439 50.48799 50.49292 50.49852 50.50500 50.51271 50.51805 50.52027
## 3 48.16973 48.24648 48.31892 48.38751 48.45271 48.51496 48.57419 48.62915
## 4 50.39844 50.42297 50.43892 50.45402 50.46745 50.47840 50.49145 50.50495
## 5 49.24822 49.17658 49.10501 49.03395 48.96378 48.89516 48.82515 48.75194
## 6 49.61255 49.57760 49.54491 49.51565 49.49047 49.46999 49.45644 49.45469
## X1969 X1970 X1971 X1972 X1973 X1974 X1975 X1976
## 1 50.83994 50.84648 50.86084 50.88378 50.91864 50.95473 50.98665 51.03230
## 2 50.52144 50.52783 50.53253 50.53259 50.52867 50.52338 50.52177 50.52395
## 3 48.67991 48.72774 48.77298 48.81549 48.85468 48.89056 48.92343 48.95351
## 4 50.51338 50.51154 50.49962 50.48708 50.47719 50.46908 50.46391 50.45860
## 5 48.67509 48.70903 48.85272 48.99319 49.12712 49.25198 49.37374 49.49393
## 6 49.44856 49.42667 49.40089 49.37683 49.35579 49.33779 49.32308 49.31141
## X1977 X1978 X1979 X1980 X1981 X1982 X1983 X1984
## 1 51.09413 51.15201 51.20927 51.26292 51.31129 51.31917 51.28851 51.24941
## 2 50.53077 50.54149 50.55006 50.52845 50.51003 50.51416 50.50989 50.51295
## 3 48.98069 49.01610 49.07308 49.14256 49.21562 49.31879 49.43961 49.60103
## 4 50.45299 50.44948 50.44576 50.43786 50.42582 50.41522 50.40657 50.39124
## 5 49.60769 49.71315 49.81089 49.90154 49.98563 50.06350 50.14279 50.22306
## 6 49.30178 49.29769 49.30832 49.32752 49.34743 49.37306 49.40450 49.44087
## X1985 X1986 X1987 X1988 X1989 X1990 X1991 X1992
## 1 51.19223 51.12383 51.04088 50.94772 50.89842 50.92887 51.04819 51.16519
## 2 50.50629 50.48600 50.46638 50.44993 50.44242 50.43587 50.43400 50.48089
## 3 49.79831 49.93787 50.02022 50.07163 50.08772 50.09682 50.10532 50.11505
## 4 50.37116 50.35338 50.33514 50.31360 50.29017 50.26808 50.24906 50.22856
## 5 50.29688 50.36424 50.43869 50.51637 50.58429 50.64631 50.69250 50.73150
## 6 49.48208 49.52579 49.56762 49.60832 49.67099 49.76549 49.87355 49.98902
## X1993 X1994 X1995 X1996 X1997 X1998 X1999 X2000
## 1 51.23609 51.30823 51.40942 51.53156 51.63796 51.73496 51.82602 51.88878
## 2 50.42971 50.46689 50.63644 50.76851 50.85067 50.84193 50.83800 50.83103
## 3 50.09946 50.07072 50.04250 50.00597 49.96733 49.93187 49.89475 49.85354
## 4 50.20727 50.18972 50.17463 50.16033 50.14760 50.13005 50.10750 50.08628
## 5 50.79308 50.86074 50.88272 50.87011 50.85717 50.85390 50.86738 50.87913
## 6 50.11363 50.24616 50.38601 50.53274 50.68744 50.84856 51.01128 51.17801
## X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008
## 1 51.94065 51.98787 52.01980 52.05175 52.09700 52.14743 52.19647 52.24375
## 2 50.81932 50.80429 50.78521 50.76560 50.74651 50.72599 50.70553 50.68732
## 3 49.81444 49.77482 49.73536 49.69777 49.66246 49.63251 49.60850 49.58623
## 4 50.06633 50.04202 50.01321 49.98585 49.96319 49.94412 49.92527 49.90073
## 5 50.88612 50.88872 50.87450 50.85106 50.82651 50.80062 50.77388 50.74781
## 6 51.24742 51.18176 51.08517 50.99288 50.90592 50.82527 50.75016 50.67827
## X2009 X2010 X2011 X2012 X2013 X2014 X2015 X2016
## 1 52.28882 52.27940 52.28857 52.36481 52.43816 52.50597 52.56565 52.62060
## 2 50.67264 50.65775 50.64105 50.62561 50.60957 50.59146 50.56575 50.55426
## 3 49.56271 49.53944 49.51849 49.49977 49.48265 49.46812 49.45720 49.44955
## 4 49.87676 49.85371 49.82468 49.79561 49.76611 49.74448 49.72998 49.71410
## 5 50.72268 50.69876 50.67764 50.65990 50.64424 50.63001 50.61682 50.60359
## 6 50.60831 50.54083 50.51035 50.51471 50.51708 50.51681 50.51462 50.51069
## X2017 X2018 X2019 X2020 X2021 X2022 X2023 X2024 X
## 1 52.67379 52.73132 52.79614 52.84451 52.83196 52.79267 52.76962 NA NA
## 2 50.55150 50.54016 50.53181 50.52512 50.51942 50.51171 50.50284 NA NA
## 3 49.45492 49.47445 49.49391 49.51122 49.53438 49.54101 49.52551 NA NA
## 4 49.69888 49.68481 49.67276 49.66354 49.65476 49.64911 49.64667 NA NA
## 5 50.59035 50.57764 50.56543 50.55582 50.54686 50.53626 50.52589 NA NA
## 6 50.50650 50.50253 50.49691 50.50732 50.53210 50.54948 50.55923 NA NA
head(data_PopulationAgeStructure)
## flagCode country AgeStructure_PctAt0To14_pct_2024
## 1 NE Niger 49.5
## 2 UG Uganda 47.0
## 3 AO Angola 46.9
## 4 ML Mali 46.8
## 5 TD Chad 45.8
## 6 CD DR Congo 45.7
## AgeStructure_PctAt15To64_pct_2024 AgeStructure_PctAt65Plus_pct_2024
## 1 47.8 2.7
## 2 50.6 2.4
## 3 50.7 2.4
## 4 50.1 3.1
## 5 51.7 2.5
## 6 51.8 2.5
head(data_Population)
## Country.Name Country.Code Indicator.Name Indicator.Code
## 1 Aruba ABW Population, total SP.POP.TOTL
## 2 Africa Eastern and Southern AFE Population, total SP.POP.TOTL
## 3 Afghanistan AFG Population, total SP.POP.TOTL
## 4 Africa Western and Central AFW Population, total SP.POP.TOTL
## 5 Angola AGO Population, total SP.POP.TOTL
## 6 Albania ALB Population, total SP.POP.TOTL
## X1960 X1961 X1962 X1963 X1964 X1965 X1966
## 1 54922 55578 56320 57002 57619 58190 58694
## 2 130072080 133534923 137171659 140945536 144904094 149033472 153281203
## 3 9035043 9214083 9404406 9604487 9814318 10036008 10266395
## 4 97630925 99706674 101854756 104089175 106388440 108772632 111246953
## 5 5231654 5301583 5354310 5408320 5464187 5521981 5581386
## 6 1608800 1659800 1711319 1762621 1814135 1864791 1914573
## X1967 X1968 X1969 X1970 X1971 X1972 X1973
## 1 58990 59069 59052 58950 58781 58047 58299
## 2 157704381 162329396 167088245 171984985 177022314 182126556 187524135
## 3 10505959 10756922 11017409 11290128 11567667 11853696 12157999
## 4 113795019 116444636 119203521 122086536 125072948 128176494 131449942
## 5 5641807 5702699 5763685 5852788 5991102 6174262 6388528
## 6 1965598 2022272 2081695 2135479 2187853 2243126 2296752
## X1974 X1975 X1976 X1977 X1978 X1979 X1980
## 1 58349 58295 58368 58580 58776 59191 59909
## 2 193186642 198914573 204802976 210680842 217074286 223974122 230792729
## 3 12469127 12773954 13059851 13340756 13611441 13655567 13169311
## 4 134911581 138569918 142337272 146258576 150402616 154721711 159166518
## 5 6613367 6842947 7074664 7317829 7576734 7847207 8133872
## 6 2350124 2404831 2458526 2513546 2566266 2617832 2671997
## X1981 X1982 X1983 X1984 X1985 X1986 X1987
## 1 60563 61276 62228 62901 61728 59931 59159
## 2 238043099 245822010 253644643 261458202 269450407 277621771 286067346
## 3 11937581 10991378 10917982 11190221 11426852 11420074 11387818
## 4 163762473 168585118 173255157 177880746 182811038 187889141 193104347
## 5 8435607 8751648 9082983 9425917 9779120 10139450 10497858
## 6 2726056 2784278 2843960 2904429 2964762 3022635 3083605
## X1988 X1989 X1990 X1991 X1992 X1993 X1994
## 1 59331 60443 62753 65896 69005 73685 77595
## 2 294498625 302939121 311748681 320442961 329082707 338324002 347441809
## 3 11523298 11874088 12045660 12238879 13278974 14943172 16250794
## 4 198485027 204062274 209566031 215178709 221191375 227246778 233360104
## 5 10861291 11238562 11626360 12023529 12423712 12827135 13249764
## 6 3142336 3227943 3286542 3266790 3247039 3227287 3207536
## X1995 X1996 X1997 X1998 X1999 X2000 X2001
## 1 79805 83021 86301 88451 89659 90588 91439
## 2 356580375 366138524 375646235 385505757 395750933 406156661 416807868
## 3 17065836 17763266 18452091 19159996 19887785 20130327 20284307
## 4 239801875 246415446 253207584 260297834 267506298 274968446 282780717
## 5 13699778 14170973 14660413 15159370 15667235 16194869 16747208
## 6 3187784 3168033 3148281 3128530 3108778 3089027 3060173
## X2002 X2003 X2004 X2005 X2006 X2007 X2008
## 1 92074 93128 95138 97635 99405 100150 100917
## 2 427820358 439173286 450928044 463076637 475606210 488580707 502070763
## 3 21378117 22733049 23560654 24404567 25424094 25909852 26482622
## 4 290841795 299142845 307725100 316588476 325663158 334984176 344586109
## 5 17327699 17943712 18600423 19291161 20015279 20778561 21578655
## 6 3051010 3039616 3026939 3011487 2992547 2970017 2947314
## X2009 X2010 X2011 X2012 X2013 X2014 X2015
## 1 101604 101838 102591 104110 105675 106807 107906
## 2 516003448 530308387 544737983 559609961 575202699 590968990 607123269
## 3 27466101 28284089 29347708 30560034 31622704 32792523 33831764
## 4 354343844 364358270 374790143 385360349 396030207 406992047 418127845
## 5 22414773 23294825 24218352 25177394 26165620 27160769 28157798
## 6 2927519 2913021 2905195 2900401 2895092 2889104 2880703
## X2016 X2017 X2018 X2019 X2020 X2021 X2022
## 1 108727 108735 108908 109203 108587 107700 107310
## 2 623369401 640058741 657801085 675950189 694446100 713090928 731821393
## 3 34700612 35688935 36743039 37856121 39068979 40000412 40578842
## 4 429454743 440882906 452195915 463365429 474569351 485920997 497387180
## 5 29183070 30234839 31297155 32375632 33451132 34532429 35635029
## 6 2876101 2873457 2866376 2854191 2837849 2811666 2777689
## X2023 X2024 X
## 1 107359 NA NA
## 2 750503764 NA NA
## 3 41454761 NA NA
## 4 509398589 NA NA
## 5 36749906 NA NA
## 6 2745972 NA NA
head(data_FertilityRate)
## Country.Name Country.Code
## 1 Aruba ABW
## 2 Africa Eastern and Southern AFE
## 3 Afghanistan AFG
## 4 Africa Western and Central AFW
## 5 Angola AGO
## 6 Albania ALB
## Indicator.Name Indicator.Code X1960 X1961
## 1 Fertility rate, total (births per woman) SP.DYN.TFRT.IN 4.567000 4.422000
## 2 Fertility rate, total (births per woman) SP.DYN.TFRT.IN 6.650330 6.667308
## 3 Fertility rate, total (births per woman) SP.DYN.TFRT.IN 7.282000 7.284000
## 4 Fertility rate, total (births per woman) SP.DYN.TFRT.IN 6.468887 6.478351
## 5 Fertility rate, total (births per woman) SP.DYN.TFRT.IN 6.708000 6.790000
## 6 Fertility rate, total (births per woman) SP.DYN.TFRT.IN 6.383000 6.273000
## X1962 X1963 X1964 X1965 X1966 X1967 X1968 X1969
## 1 4.262000 4.107000 3.940000 3.797000 3.621000 3.452000 3.277000 3.111000
## 2 6.688246 6.709226 6.724930 6.737459 6.766486 6.775493 6.782523 6.782813
## 3 7.292000 7.302000 7.304000 7.305000 7.320000 7.339000 7.363000 7.389000
## 4 6.492277 6.500229 6.516739 6.532766 6.556550 6.582717 6.607297 6.633569
## 5 6.872000 6.954000 7.036000 7.116000 7.194000 7.267000 7.332000 7.388000
## 6 6.106000 5.927000 5.714000 5.474000 5.325000 5.310000 5.317000 5.287000
## X1970 X1971 X1972 X1973 X1974 X1975 X1976 X1977
## 1 2.973000 2.862000 2.755000 2.655000 2.573000 2.499000 2.432000 2.372000
## 2 6.792709 6.798998 6.798352 6.796038 6.792972 6.783362 6.768292 6.749405
## 3 7.400000 7.432000 7.453000 7.487000 7.526000 7.542000 7.561000 7.591000
## 4 6.655836 6.697323 6.733532 6.764148 6.802286 6.839113 6.858526 6.890310
## 5 7.434000 7.467000 7.488000 7.498000 7.500000 7.494000 7.485000 7.475000
## 6 5.158000 5.083000 4.979000 4.829000 4.700000 4.566000 4.374000 4.204000
## X1978 X1979 X1980 X1981 X1982 X1983 X1984 X1985
## 1 2.312000 2.257000 2.203000 2.161000 2.142000 2.148000 2.170000 2.188000
## 2 6.733325 6.715178 6.694612 6.663769 6.635449 6.595348 6.549886 6.500241
## 3 7.599000 7.612000 7.643000 7.617000 7.600000 7.570000 7.554000 7.550000
## 4 6.916096 6.904869 6.888044 6.871587 6.846727 6.819426 6.770109 6.717738
## 5 7.467000 7.461000 7.459000 7.459000 7.461000 7.462000 7.459000 7.451000
## 6 3.996000 3.782000 3.590000 3.468000 3.416000 3.319000 3.282000 3.214000
## X1986 X1987 X1988 X1989 X1990 X1991 X1992 X1993
## 1 2.212000 2.243000 2.273000 2.306000 2.345000 2.362000 2.353000 2.331000
## 2 6.448271 6.398680 6.329041 6.249929 6.160212 6.094846 6.028219 5.970246
## 3 7.553000 7.548000 7.551000 7.559000 7.576000 7.631000 7.703000 7.761000
## 4 6.676484 6.629947 6.594412 6.562254 6.516886 6.470298 6.417682 6.362002
## 5 7.435000 7.409000 7.373000 7.328000 7.272000 7.208000 7.138000 7.065000
## 6 3.141000 3.106000 3.069000 3.030000 3.014000 2.956000 2.885000 2.811000
## X1994 X1995 X1996 X1997 X1998 X1999 X2000 X2001
## 1 2.298000 2.288000 2.233000 2.138000 2.001000 1.897000 1.845000 1.813000
## 2 5.911238 5.853480 5.781580 5.710820 5.662886 5.611619 5.549893 5.501766
## 3 7.767000 7.767000 7.757000 7.732000 7.693000 7.641000 7.566000 7.453000
## 4 6.299172 6.236263 6.171442 6.103496 6.044794 6.027932 6.022143 5.990074
## 5 6.990000 6.918000 6.851000 6.789000 6.732000 6.683000 6.639000 6.601000
## 6 2.798000 2.762000 2.666000 2.535000 2.425000 2.313000 2.217000 2.141000
## X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
## 1 1.800000 1.808000 1.819000 1.844000 1.862000 1.881000 1.889000 1.875000
## 2 5.449696 5.407328 5.381308 5.350399 5.305920 5.253985 5.219403 5.142535
## 3 7.320000 7.174000 7.018000 6.858000 6.686000 6.508000 6.392000 6.295000
## 4 5.956202 5.923573 5.895514 5.871770 5.846028 5.809651 5.785473 5.751120
## 5 6.567000 6.533000 6.499000 6.461000 6.419000 6.372000 6.320000 6.260000
## 6 2.002000 2.006000 1.889000 1.787000 1.686000 1.633000 1.616000 1.638000
## X2010 X2011 X2012 X2013 X2014 X2015 X2016 X2017
## 1 1.855000 1.858000 1.907000 1.944000 1.944000 1.899000 1.848000 1.785000
## 2 5.066497 4.981016 4.899585 4.819563 4.748074 4.676508 4.615744 4.569884
## 3 6.195000 6.094000 5.985000 5.879000 5.770000 5.652000 5.542000 5.433000
## 4 5.712896 5.665667 5.596800 5.515468 5.440018 5.346596 5.228976 5.098885
## 5 6.194000 6.120000 6.039000 5.953000 5.864000 5.774000 5.686000 5.600000
## 6 1.653000 1.677000 1.719000 1.741000 1.721000 1.631000 1.555000 1.486000
## X2018 X2019 X2020 X2021 X2022 X2023 X2024 X
## 1 1.732000 1.701000 1.662000 1.631000 1.615000 1.602000 NA NA
## 2 4.521443 4.471338 4.412973 4.350683 4.287033 4.223771 NA NA
## 3 5.327000 5.238000 5.145000 5.039000 4.932000 4.840000 NA NA
## 4 4.962571 4.829134 4.707399 4.637741 4.563354 4.497707 NA NA
## 5 5.519000 5.442000 5.371000 5.304000 5.209000 5.124000 NA NA
## 6 1.415000 1.395000 1.371000 1.365000 1.355000 1.348000 NA NA
data_Population_tidy <- clean_WB(data_Population, "Population")
data_FertilityRate_tidy <- clean_WB(data_FertilityRate, "Fertility.Rate")
data_TertiaryEducation_tiday <- clean_WB(data_TertiaryEducation, "Tertiary.Education")
data_PropFemale_tidy <- clean_WB(data_PropFemale, "Prop.Female")
PAS_temp <- data_PopulationAgeStructure %>%
rename(X0to14_2024 = AgeStructure_PctAt0To14_pct_2024,
X15to65_2024 = AgeStructure_PctAt15To64_pct_2024,
X65plus_2024 = AgeStructure_PctAt65Plus_pct_2024) %>%
mutate(country = recode(country,
"DR Congo" = "Congo, Dem. Rep.",
"Republic of the Congo" = "Congo, Rep.",
"Egypt" = "Egypt, Arab Rep.",
"Gambia" = "Gambia, The",
"Syria" = "Syrian Arab Republic",
"Palestine" = "West Bank and Gaza",
"Ivory Coast" = "Cote d'Ivoire",
"Kyrgyzstan" = "Kyrgyz Republic",
"Cape Verde" = "Cabo Verde",
"Laos" = "Lao PDR",
"Iran" = "Iran, Islamic Rep.",
"Bahamas" = "Bahamas, The",
"Brunei" = "Brunei Darussalam",
"South Korea" = "Korea, Rep.",
"Vietnam" = "Viet Nam",
"Saint Kitts and Nevis" = "St. Kitts and Nevis",
"Sint Maarten" = "Sint Maarten (Dutch part)",
"Turkey" = "Turkiye",
"Czech Republic" = "Czechia",
"Macau" = "Macao SAR, China",
"Russia" = "Russian Federation",
"Hong Kong" = "Hong Kong SAR, China",
"Slovakia" = "Slovak Republic",
"United States Virgin Islands" = "Virgin Islands (U.S.)",
"Saint Vincent and the Grenadines" = "St. Vincent and the Grenadines",
"Saint Lucia" = "St. Lucia",
"Micronesia" = "Micronesia, Fed. Sts.",
"North Korea" = "Korea, Dem. People's Rep.",
"Venezuela" = "Venezuela, RB",
"Yemen" = "Yemen, Rep.",
"Saint Martin" = "St. Martin (French part)"))
data_PopulationAgeStructure_tidy <- PAS_temp %>%
add_row(flagCode = NA, country = "Channel Islands",
# weighted average of Jersey and Guernsey
# both populations are from World Population Review
X0to14_2024 = (PAS_temp[PAS_temp$country=="Jersey", "X0to14_2024"] * 103989 +
PAS_temp[PAS_temp$country=="Guernsey", "X0to14_2024"] * 64477) / 168466,
X15to65_2024 = (PAS_temp[PAS_temp$country=="Jersey", "X15to65_2024"] * 103989 +
PAS_temp[PAS_temp$country=="Guernsey", "X15to65_2024"] * 64477) / 168466,
X65plus_2024 = (PAS_temp[PAS_temp$country=="Jersey", "X65plus_2024"] * 103989 +
PAS_temp[PAS_temp$country=="Guernsey", "X65plus_2024"] * 64477) / 168466) %>%
filter(! country %in% c("Jersey", "Guernsey"))
head(data_GDPPerCapita)
## Country.Name Country.Code
## 1 Aruba ABW
## 2 Africa Eastern and Southern AFE
## 3 Afghanistan AFG
## 4 Africa Western and Central AFW
## 5 Angola AGO
## 6 Albania ALB
## Indicator.Name Indicator.Code X1960 X1961
## 1 GDP per capita, PPP (current international $) NY.GDP.PCAP.PP.CD NA NA
## 2 GDP per capita, PPP (current international $) NY.GDP.PCAP.PP.CD NA NA
## 3 GDP per capita, PPP (current international $) NY.GDP.PCAP.PP.CD NA NA
## 4 GDP per capita, PPP (current international $) NY.GDP.PCAP.PP.CD NA NA
## 5 GDP per capita, PPP (current international $) NY.GDP.PCAP.PP.CD NA NA
## 6 GDP per capita, PPP (current international $) NY.GDP.PCAP.PP.CD NA NA
## X1962 X1963 X1964 X1965 X1966 X1967 X1968 X1969 X1970 X1971 X1972 X1973 X1974
## 1 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 4 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA NA NA NA NA NA NA
## X1975 X1976 X1977 X1978 X1979 X1980 X1981 X1982 X1983 X1984 X1985 X1986 X1987
## 1 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 4 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA NA NA NA NA NA NA
## X1988 X1989 X1990 X1991 X1992 X1993 X1994 X1995
## 1 NA NA 21732.858 23099.940 23889.045 24575.661 25791.043 26254.743
## 2 NA NA 1804.850 1817.905 1775.858 1766.045 1788.170 1858.436
## 3 NA NA NA NA NA NA NA NA
## 4 NA NA 1727.098 1759.836 1791.207 1764.645 1753.347 1778.289
## 5 NA NA 3340.598 3372.597 3143.441 2369.242 2374.021 2695.806
## 6 NA NA 2549.242 1908.942 1823.142 2057.284 2289.685 2665.542
## X1996 X1997 X1998 X1999 X2000 X2001 X2002
## 1 26004.496 27240.803 27412.755 27765.653 30245.7070 31920.239 31888.5087
## 2 1944.803 2011.694 2019.400 2052.057 2116.2394 2186.630 2249.0415
## 3 NA NA NA NA 813.5503 747.688 926.5079
## 4 1844.819 1908.391 1946.772 1953.486 2016.0918 2108.608 2278.4623
## 5 3013.333 3178.495 3254.244 3262.976 3326.7799 3427.854 3824.2848
## 6 2979.809 2717.129 3021.036 3471.650 3861.2959 4300.829 4661.3865
## X2003 X2004 X2005 X2006 X2007 X2008 X2009
## 1 32507.084 35059.2731 35098.798 35937.595 37768.566 38904.999 34339.939
## 2 2308.118 2438.8993 2602.006 2785.792 2971.975 3082.151 3041.089
## 3 966.962 971.6335 1076.087 1121.834 1286.950 1333.747 1570.698
## 4 2383.607 2567.0755 2722.787 2873.342 3024.431 3185.419 3305.684
## 5 3878.508 4262.9060 4876.288 5404.384 6095.999 6651.378 6498.169
## 6 5000.329 5427.8809 5865.322 6566.515 7285.035 8228.343 8812.729
## X2010 X2011 X2012 X2013 X2014 X2015 X2016
## 1 33729.512 35324.072 34095.647 35901.653 35657.287 35972.866 36117.508
## 2 3149.020 3253.724 3171.570 3290.844 3426.576 3456.365 3551.069
## 3 1765.538 1744.061 1988.429 2133.241 2224.491 2284.076 2213.181
## 4 3477.320 3620.650 3735.925 3899.646 4103.766 4052.734 3996.864
## 5 6607.022 6711.390 7354.943 7561.039 7990.274 7119.726 6843.736
## 6 9627.114 10207.726 10526.242 10570.989 11259.240 11662.036 12078.859
## X2017 X2018 X2019 X2020 X2021 X2022 X2023 X2024
## 1 37524.928 39287.020 39110.276 28976.464 35696.309 41649.451 44967.345 NA
## 2 3703.887 3648.311 3742.849 3629.508 3907.952 4229.682 4374.230 NA
## 3 2335.796 2432.277 2583.485 2561.982 2144.167 2122.996 2211.281 NA
## 4 4063.185 4198.081 4454.156 4441.937 4698.927 5107.273 5343.469 NA
## 5 6992.729 7347.800 7528.382 6450.750 7408.127 7924.889 8040.702 NA
## 6 12771.034 13696.789 14792.257 14511.984 16127.753 19446.237 21263.196 NA
## X
## 1 NA
## 2 NA
## 3 NA
## 4 NA
## 5 NA
## 6 NA
head(data_GiniCoefficient)
## Country.Name Country.Code Indicator.Name Indicator.Code X1960
## 1 Aruba ABW Gini index SI.POV.GINI NA
## 2 Africa Eastern and Southern AFE Gini index SI.POV.GINI NA
## 3 Afghanistan AFG Gini index SI.POV.GINI NA
## 4 Africa Western and Central AFW Gini index SI.POV.GINI NA
## 5 Angola AGO Gini index SI.POV.GINI NA
## 6 Albania ALB Gini index SI.POV.GINI NA
## X1961 X1962 X1963 X1964 X1965 X1966 X1967 X1968 X1969 X1970 X1971 X1972 X1973
## 1 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 4 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA NA NA NA NA NA NA
## X1974 X1975 X1976 X1977 X1978 X1979 X1980 X1981 X1982 X1983 X1984 X1985 X1986
## 1 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 4 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA NA NA NA NA NA NA
## X1987 X1988 X1989 X1990 X1991 X1992 X1993 X1994 X1995 X1996 X1997 X1998 X1999
## 1 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 4 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA NA NA 27 NA NA NA
## X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009 X2010 X2011 X2012
## 1 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 4 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 5 51.9 NA NA NA NA NA NA NA 42.7 NA NA NA NA
## 6 NA NA 31.7 NA NA 30.6 NA NA 30.0 NA NA NA 29
## X2013 X2014 X2015 X2016 X2017 X2018 X2019 X2020 X2021 X2022 X2023 X2024 X
## 1 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 4 NA NA NA NA NA NA NA NA NA NA NA NA NA
## 5 NA NA NA NA NA 51.3 NA NA NA NA NA NA NA
## 6 NA 34.6 32.8 33.7 33.1 30.1 30.1 29.4 NA NA NA NA NA
data_GDPPerCapita_tidy <- clean_WB(data_GDPPerCapita, "GDP.PC")
data_GiniCoefficient_tidy <- clean_WB(data_GiniCoefficient, "Gini.Coefficient")
data_Unemployment_tidy <- clean_WB(data_Unemployment, "Unemployment")
data_ServicesEmployment_tidy <- clean_WB(data_ServicesEmployment, "Services.Employment")
data_Agriculture_tidy <- clean_WB(data_Agriculture, "Agri")
data_Manufacturing_tidy <- clean_WB(data_Manufacturing, "Manu")
data_Industry_tidy <- clean_WB(data_Industry, "Indus")
data_Services_tidy <- clean_WB(data_Services, "Serv")
data_Sectors <- data_Agriculture_tidy %>%
full_join(data_Manufacturing_tidy %>% select(-Country.Code), by = c("Country.Name")) %>%
full_join(data_Industry_tidy %>% select(-Country.Code), by = c("Country.Name")) %>%
full_join(data_Services_tidy %>% select(-Country.Code), by = c("Country.Name")) %>%
mutate(Agri = replace_na(Agri, 0),
Manu = replace_na(Manu, 0),
Indus = replace_na(Indus, 0),
Serv = replace_na(Serv, 0))
sectors_SG <- data_Sectors %>%
filter(Country.Name == "Singapore") %>%
pivot_longer(c(Agri, Manu, Indus, Serv), names_to = "Sector", values_to = "Prop") %>%
pull(Prop)
sectors_all <- data.matrix(
data_Sectors %>%
select(Agri, Manu, Indus, Serv)
)
sectors_similarities <- sectors_all %*% sectors_SG
data_Sectors_tidy <- cbind(data_Sectors, Sector.Similarity = sectors_similarities ^ 0.5)
nonCountryDatepoints2 <- c("APEC", "ASEAN",
"Africa",
"Africa: Low income",
"Africa: Lower-middle income",
"Africa: Upper-middle income",
"Americas",
"Americas: High income",
"Americas: Lower-middle income",
"Americas: Upper-middle income",
"Arab League",
"Arab States",
"Arab States: High income",
"Arab States: Low income",
"Arab States: Lower-middle income",
"Arab States: Upper-middle income",
"Asia and the Pacific",
"Asia and the Pacific: High income",
"Asia and the Pacific: Low income",
"Asia and the Pacific: Lower-middle income",
"Asia and the Pacific: Upper-middle income",
"BRICS", "CARICOM", "Caribbean",
"Central Africa",
"Central America",
"Central Asia",
"Central and Western Asia",
"Central and Western Asia: High income",
"Central and Western Asia: Lower-middle income",
"Central and Western Asia: Upper-middle income",
"Eastern Africa",
"Eastern Asia",
"Eastern Asia: High income",
"Eastern Europe",
"Eastern Europe: High income",
"Eastern Europe: Upper-middle income",
"Europe and Central Asia",
"Europe and Central Asia: High income",
"Europe and Central Asia: Lower-middle income",
"Europe and Central Asia: Upper-middle income",
"European Union 27", "European Union 28",
"G20", "G7",
"Latin America and the Caribbean",
"Latin America and the Caribbean: High income",
"Latin America and the Caribbean: Lower-middle income",
"Latin America and the Caribbean: Upper-middle income",
"MENA",
"Northern Africa",
"Northern Africa: Lower-middle income",
"Northern America",
"Northern America: High income",
"Northern Europe",
"Northern, Southern and Western Europe",
"Northern, Southern and Western Europe: High income",
"Northern, Southern and Western Europe: Upper-middle income",
"Pacific Islands",
"South America",
"South-Eastern Asia",
"South-Eastern Asia and the Pacific",
"South-Eastern Asia and the Pacific: High income",
"South-Eastern Asia and the Pacific: Lower-middle income",
"South-Eastern Asia and the Pacific: Upper-middle income",
"Southern Africa",
"Southern Asia",
"Southern Asia: Lower-middle income",
"Southern Europe",
"Sub-Saharan Africa",
"Sub-Saharan Africa: Low income",
"Sub-Saharan Africa: Lower-middle income",
"Sub-Saharan Africa: Upper-middle income",
"Western Africa",
"Western Asia",
"Western Europe",
"World",
"World excluding BRICS",
"World excluding India and China",
"World: High income",
"World: Low income",
"World: Lower-middle income",
"World: Lower-middle income excluding India",
"World: Upper-middle income",
"World: Upper-middle income excluding China")
WC_temp <- data_WelfareCoverage %>%
filter(! ref_area.label %in% nonCountryDatepoints2,
sex.label == "Total",
classif1.label == "Contingency: Population covered by at least one social protection benefit",
time >= 2020) %>%
group_by(ref_area.label) %>%
summarise(Welfare.Coverage = mean(obs_value)) %>%
rename(country = ref_area.label) %>%
mutate(country = recode(country,
"Côte d'Ivoire" = "Cote d'Ivoire",
"Bahamas" = "Bahamas, The",
"Congo, Democratic Republic of the" = "Congo, Dem. Rep.",
"Egypt" = "Egypt, Arab Rep.",
"Gambia" = "Gambia, The",
"Kyrgyzstan" = "Kyrgyz Republic",
"Bolivia (Plurinational State of)" = "Bolivia",
"Curaçao" = "Curacao",
"Venezuela (Bolivarian Republic of)" = "Venezuela, RB",
"United States of America" = "United States",
"United Kingdom of Great Britain and Northern Ireland" = "United Kingdom",
"Türkiye" = "Turkiye",
"Occupied Palestinian Territory" = "West Bank and Gaza",
"Macao, China" = "Macao SAR, China",
"Tanzania, United Republic of" = "Tanzania",
"Micronesia (Federated States of)" = "Micronesia, Fed. Sts.",
"Republic of Moldova" = "Moldova",
"Republic of Korea" = "Korea, Rep.",
"Lao People's Democratic Republic" = "Lao PDR",
"Iran (Islamic Republic of)" = "Iran, Islamic Rep.",
"Hong Kong, China" = "Hong Kong SAR, China",
"Saint Kitts and Nevis" = "St. Kitts and Nevis",
"Saint Vincent and the Grenadines" = "St. Vincent and the Grenadines",
"Saint Lucia" = "St. Lucia",
"Slovakia" = "Slovak Republic",
"Yemen" = "Yemen, Rep.",
"United States Virgin Islands" = "Virgin Islands (U.S.)"))
data_WelfareCoverage_tidy <- WC_temp %>%
add_row(country = "Channel Islands",
# weighted average of Jersey and Guernsey
# both populations are from World Population Review
Welfare.Coverage = (WC_temp$Welfare.Coverage[WC_temp$country == "Jersey"] * 103989 +
WC_temp$Welfare.Coverage[WC_temp$country == "Guernsey"] * 6447) / 168466) %>%
filter(! country %in% c("Jersey", "Guernsey"))
data_GenderPayGap_tidy <- data_GenderPayGap %>%
filter(occupation == "_T",
time_period >= 2020) %>%
group_by(ref_area_desc) %>%
summarise(Pay.Gap = mean(obs_value)) %>%
rename(country = ref_area_desc) %>%
mutate(country = recode(country,
"Côte d'Ivoire" = "Cote d'Ivoire",
"Bahamas" = "Bahamas, The",
"Congo, Democratic Republic of the" = "Congo, Dem. Rep.",
"Egypt" = "Egypt, Arab Rep.",
"Gambia" = "Gambia, The",
"Kyrgyzstan" = "Kyrgyz Republic",
"Bolivia (Plurinational State of)" = "Bolivia",
"Curaçao" = "Curacao",
"Venezuela (Bolivarian Republic of)" = "Venezuela, RB",
"United States of America" = "United States",
"United Kingdom of Great Britain and Northern Ireland" = "United Kingdom",
"Türkiye" = "Turkiye",
"Occupied Palestinian Territory" = "West Bank and Gaza",
"Macao, China" = "Macao SAR, China",
"Tanzania, United Republic of" = "Tanzania",
"Micronesia (Federated States of)" = "Micronesia, Fed. Sts.",
"Republic of Moldova" = "Moldova",
"Republic of Korea" = "Korea, Rep.",
"Lao People's Democratic Republic" = "Lao PDR",
"Iran (Islamic Republic of)" = "Iran, Islamic Rep.",
"Hong Kong, China" = "Hong Kong SAR, China",
"Saint Kitts and Nevis" = "St. Kitts and Nevis",
"Saint Vincent and the Grenadines" = "St. Vincent and the Grenadines",
"Saint Lucia" = "St. Lucia",
"Slovakia" = "Slovak Republic",
"Yemen" = "Yemen, Rep.",
"United States Virgin Islands" = "Virgin Islands (U.S.)"))
data_GenderEquality_tidy <- data_GenderEquality %>%
select(flagCode, country, GenderEquality_GlobalEqualityScore_score_2024) %>%
mutate(country = recode(country,
"DR Congo" = "Congo, Dem. Rep.",
"Republic of the Congo" = "Congo, Rep.",
"Egypt" = "Egypt, Arab Rep.",
"Gambia" = "Gambia, The",
"Syria" = "Syrian Arab Republic",
"Palestine" = "West Bank and Gaza",
"Ivory Coast" = "Cote d'Ivoire",
"Kyrgyzstan" = "Kyrgyz Republic",
"Cape Verde" = "Cabo Verde",
"Laos" = "Lao PDR",
"Iran" = "Iran, Islamic Rep.",
"Bahamas" = "Bahamas, The",
"Brunei" = "Brunei Darussalam",
"South Korea" = "Korea, Rep.",
"Vietnam" = "Viet Nam",
"Saint Kitts and Nevis" = "St. Kitts and Nevis",
"Sint Maarten" = "Sint Maarten (Dutch part)",
"Turkey" = "Turkiye",
"Czech Republic" = "Czechia",
"Macau" = "Macao SAR, China",
"Russia" = "Russian Federation",
"Hong Kong" = "Hong Kong SAR, China",
"Slovakia" = "Slovak Republic",
"United States Virgin Islands" = "Virgin Islands (U.S.)",
"Saint Vincent and the Grenadines" = "St. Vincent and the Grenadines",
"Saint Lucia" = "St. Lucia",
"Micronesia" = "Micronesia, Fed. Sts.",
"North Korea" = "Korea, Dem. People's Rep.",
"Venezuela" = "Venezuela, RB",
"Yemen" = "Yemen, Rep.",
"Saint Martin" = "St. Martin (French part)"))
data_MaternityLeave_tidy <- data_MaternityLeave %>%
select(country, MaternityLeave_LengthInWeeks_numOfWeeks_YearFree) %>%
rename(Maternity.Leave = MaternityLeave_LengthInWeeks_numOfWeeks_YearFree) %>%
mutate(country = recode(country,
"DR Congo" = "Congo, Dem. Rep.",
"Republic of the Congo" = "Congo, Rep.",
"Egypt" = "Egypt, Arab Rep.",
"Gambia" = "Gambia, The",
"Syria" = "Syrian Arab Republic",
"Palestine" = "West Bank and Gaza",
"Ivory Coast" = "Cote d'Ivoire",
"Kyrgyzstan" = "Kyrgyz Republic",
"Cape Verde" = "Cabo Verde",
"Laos" = "Lao PDR",
"Iran" = "Iran, Islamic Rep.",
"Bahamas" = "Bahamas, The",
"Brunei" = "Brunei Darussalam",
"South Korea" = "Korea, Rep.",
"Vietnam" = "Viet Nam",
"Saint Kitts and Nevis" = "St. Kitts and Nevis",
"Sint Maarten" = "Sint Maarten (Dutch part)",
"Turkey" = "Turkiye",
"Czech Republic" = "Czechia",
"Macau" = "Macao SAR, China",
"Russia" = "Russian Federation",
"Hong Kong" = "Hong Kong SAR, China",
"Slovakia" = "Slovak Republic",
"United States Virgin Islands" = "Virgin Islands (U.S.)",
"Saint Vincent and the Grenadines" = "St. Vincent and the Grenadines",
"Saint Lucia" = "St. Lucia",
"Micronesia" = "Micronesia, Fed. Sts.",
"North Korea" = "Korea, Dem. People's Rep.",
"Venezuela" = "Venezuela, RB",
"Yemen" = "Yemen, Rep.",
"Saint Martin" = "St. Martin (French part)")) %>%
mutate(Maternity.Leave.Days = Maternity.Leave * 7) %>%
select(-Maternity.Leave)
data_PaternityLeave_tidy <- data_PaternityLeave %>%
select(country, PaternityLeave_DaysOfPaternityLeave_num_YearFree) %>%
rename(Paternity.Leave.Days = PaternityLeave_DaysOfPaternityLeave_num_YearFree) %>%
mutate(country = recode(country,
"DR Congo" = "Congo, Dem. Rep.",
"Republic of the Congo" = "Congo, Rep.",
"Egypt" = "Egypt, Arab Rep.",
"Gambia" = "Gambia, The",
"Syria" = "Syrian Arab Republic",
"Palestine" = "West Bank and Gaza",
"Ivory Coast" = "Cote d'Ivoire",
"Kyrgyzstan" = "Kyrgyz Republic",
"Cape Verde" = "Cabo Verde",
"Laos" = "Lao PDR",
"Iran" = "Iran, Islamic Rep.",
"Bahamas" = "Bahamas, The",
"Brunei" = "Brunei Darussalam",
"South Korea" = "Korea, Rep.",
"Vietnam" = "Viet Nam",
"Saint Kitts and Nevis" = "St. Kitts and Nevis",
"Sint Maarten" = "Sint Maarten (Dutch part)",
"Turkey" = "Turkiye",
"Czech Republic" = "Czechia",
"Macau" = "Macao SAR, China",
"Russia" = "Russian Federation",
"Hong Kong" = "Hong Kong SAR, China",
"Slovakia" = "Slovak Republic",
"United States Virgin Islands" = "Virgin Islands (U.S.)",
"Saint Vincent and the Grenadines" = "St. Vincent and the Grenadines",
"Saint Lucia" = "St. Lucia",
"Micronesia" = "Micronesia, Fed. Sts.",
"North Korea" = "Korea, Dem. People's Rep.",
"Venezuela" = "Venezuela, RB",
"Yemen" = "Yemen, Rep.",
"Saint Martin" = "St. Martin (French part)"))
data_ExpenditureonFamily_tidy <- data_ExpenditureonFamily %>%
filter(REF_AREA != "OECD",
TIME_PERIOD >= 2020) %>%
group_by(REF_AREA) %>%
summarise(Expeniture.on.Family = mean(OBS_VALUE)) %>%
left_join(data_Population_tidy %>% select(Country.Name, Country.Code),
by = c("REF_AREA" = "Country.Code")) %>%
drop_na()
data_ExpenditureonIncapacity_tidy <- data_ExpenditureonIncapacity %>%
filter(REF_AREA != "OECD",
TIME_PERIOD >= 2020) %>%
group_by(REF_AREA) %>%
summarise(Expeniture.on.Incapacity = mean(OBS_VALUE)) %>%
left_join(data_Population_tidy %>% select(Country.Name, Country.Code),
by = c("REF_AREA" = "Country.Code")) %>%
drop_na()
data_SocialExpenditure_tidy <- data_ExpenditureonFamily_tidy %>%
left_join(data_ExpenditureonIncapacity_tidy %>% select(-Country.Name), by = c("REF_AREA")) %>%
rename(Country.Code = REF_AREA) %>%
mutate(Expeniture.on.Incapacity = replace_na(Expeniture.on.Incapacity, 0),
Expeniture.on.Family = replace_na(Expeniture.on.Family, 0)) %>%
mutate(Social.Expenditure = Expeniture.on.Incapacity + Expeniture.on.Family) %>%
select(-Expeniture.on.Family, -Expeniture.on.Incapacity)
Only 42 countries are represented in these datasets (second one only 38 after removing NAs and those without any readings since 2020). Actually, let’s completely give up on these tables. Because we have the welfare coverage. And expenditure on family may actually be more related to how important family is, and if the society is a traditional gender roles kind of family then it will mess up the measures.
We want to choose the countries with which we can predict the effect
of different policies on Singapore. Hence, we try to identify factors
which affect the effect of possible policies to boost Female LFPR.
Policies would broadly be either financial (increasing the returns for
females to work, decreasing the opportunity cost of hiring females) or
non-financial (decreasing the opportunity cost for females to work,
increasing the opportunity cost of not hiring females). Below, I will
list out the data we are using and how it is relevant to these
factors.
Demographic:
Female.LFPR: which signals a starting point. This can
suggest how accepted female employment is at large (if wanna go quirky,
we can do quick LMs and see which are the most correlated?).Population: Countries with large and small popualtions
engage in different kinds of policies and face different bureaucratic
burdens.Fertility.Rate: Singapore is struggling with its ageing
population. Policies which target female LFPR must work in the context
of the ageing workforce.Tertiary.Education: suggests the kinds of jobs that are
available to potential female workers, which may then affect the
policies that are suited for boosting employment in those sectors.Economic:
GDP.PC: Generally, female LFPR does depend on income
level of a country. In very low income countries, female LFPR is high
due to females working being necessary. Hence, female LFPR is unlikely
to be increased through policy. In middle income countries, stigma
around married women working is particularly high, as it signals the
inaptitude of the man to make an adequate living.Unemployment: economic capacity for more women to enter
the workforce.Cultural:
Welfare.Coverage: Whether people are used to welfare
policies and a generally interventionist government.Maternity.Leave.Total.W: suggests the degree of family
care expected of female workers, but alsoGender.Equality: generally, higher gender equality
means greater respect for women’s rights and abilities (not always, but
usually). Hence, it should come with an increased accpetance for female
employment.income_group <- read.csv("../data/metadata/Population/Metadata_Country_API_SP.POP.TOTL_DS2_en_csv_v2_2590.csv") %>%
select(Country.Code, Region, IncomeGroup)
full_data <- data_FemaleLFPR_tidy %>%
full_join(data_Population_tidy %>% select(-Country.Code), by = c("Country.Name")) %>%
full_join(data_FertilityRate_tidy %>% select(-Country.Code), by = c("Country.Name")) %>%
full_join(data_TertiaryEducation_tiday %>% select(-Country.Code), by = c("Country.Name")) %>%
full_join(data_GDPPerCapita_tidy %>% select(-Country.Code), by = c("Country.Name")) %>%
left_join(income_group, by = c("Country.Code")) %>%
mutate(IncomeGroup = factor(IncomeGroup,
levels = c("Low income", "Lower middle income",
"Upper middle income", "High income"),
labels = c(1, 2, 3, 4),
ordered = TRUE)) %>%
mutate(IncomeGroup = as.numeric(IncomeGroup)) %>%
# full_join(data_Sectors_tidy %>% select(-Country.Code), by = c("Country.Name")) %>%
# select(-Agri, -Manu, -Indus, -Sector.Similarity) %>%
full_join(data_Unemployment_tidy %>% select(-Country.Code), by = c("Country.Name")) %>%
full_join(data_ServicesEmployment_tidy %>% select(-Country.Code), by = c("Country.Name")) %>%
full_join(data_WelfareCoverage_tidy, by = c("Country.Name" = "country")) %>%
full_join(data_MaternityLeave_tidy, by = c("Country.Name" = "country")) %>%
full_join(data_PaternityLeave_tidy, by = c("Country.Name" = "country")) %>%
replace_na(list(Maternity.Leave.Days = 0,
Paternity.Leave.Days = 0)) %>%
mutate(Maternity.Paternity.Leave.Diff = Maternity.Leave.Days - Paternity.Leave.Days) %>%
select(-Maternity.Leave.Days, -Paternity.Leave.Days) %>%
full_join(data_GenderEquality_tidy %>% select(-flagCode), by = c("Country.Name" = "country")) %>%
rename(Gender.Equality = GenderEquality_GlobalEqualityScore_score_2024) %>%
relocate(Country.Code, .after = Country.Name) %>%
relocate(Region, .after = Country.Code) %>%
mutate(across(Female.LFPR:Gender.Equality, scale))
head(full_data)
## # A tibble: 6 × 14
## Country.Name Country.Code Region Female.LFPR[,1] Population[,1]
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Afghanistan AFG South Asia -2.95 0.0310
## 2 Albania ALB Europe & Central Asia 0.182 -0.241
## 3 Algeria DZA Middle East & North … -2.38 0.0642
## 4 Angola AGO Sub-Saharan Africa 1.45 -0.00207
## 5 Argentina ARG Latin America & Cari… 0.145 0.0598
## 6 Armenia ARM Europe & Central Asia 0.370 -0.240
## # ℹ 9 more variables: Fertility.Rate <dbl[,1]>, Tertiary.Education <dbl[,1]>,
## # GDP.PC <dbl[,1]>, IncomeGroup <dbl[,1]>, Unemployment <dbl[,1]>,
## # Services.Employment <dbl[,1]>, Welfare.Coverage <dbl[,1]>,
## # Maternity.Paternity.Leave.Diff <dbl[,1]>, Gender.Equality <dbl[,1]>
cormat <- round(cor(full_data %>%
select(Female.LFPR:Gender.Equality, -IncomeGroup) %>%
drop_na()), 2)
melted_cormat_full <- melt(cormat)
get_lower_tri<-function(cormat) {
cormat[upper.tri(cormat)] <- NA
return(cormat)
}
get_upper_tri <- function(cormat) {
cormat[lower.tri(cormat)]<- NA
return(cormat)
}
upper_tri <- get_upper_tri(cormat)
melted_cormat <- melt(upper_tri, na.rm = TRUE)
ggplot(data = melted_cormat, aes(Var2, Var1, fill = value)) +
geom_tile(color = "white") +
scale_fill_gradient2(low = "dodgerblue", high = "tomato", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation") +
geom_text(aes(Var2, Var1, label = value), color = "black", size = 2.5) +
labs(title = "Correlation between variables") +
theme_minimal() +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1, size = 10, colour = "black"),
axis.text.y = element_text(size = 10, colour = "black"),
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
legend.position = c(.55, .95),
legend.justification = c("right", "top"),
legend.direction = "horizontal") +
guides(fill = guide_colorbar(barwidth = 6, barheight = 1,
title.position = "top", title.hjust = 0.5)) +
coord_fixed()
## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
par(mfrow=c(3, 3))
pivoted_table <- full_data %>%
select(-IncomeGroup) %>%
pivot_longer(Population:Gender.Equality, names_to = "Measure", values_to = "x")
ggplot(pivoted_table, aes(x = x, y = Female.LFPR)) +
geom_point(aes(colour = Region), size = 0.5, alpha = 0.7) +
geom_text_repel(aes(label = Country.Code), colour = "black", size=2) +
facet_wrap(~Measure) +
theme_minimal() +
theme(plot.background = element_rect(colour = "black", linewidth = 1)) +
scale_color_brewer(palette = "Set1")
## Warning: Removed 464 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 464 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 184 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 183 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 135 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 176 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 185 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 184 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 145 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 186 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 164 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
cov_mat <- var(full_data %>% select(-Country.Name, -Country.Code, -Region, -IncomeGroup) %>% drop_na(), na.rm=TRUE)
data_mat <- data.matrix(full_data %>% select(-Country.Name, -Country.Code, -Region, -IncomeGroup) %>% drop_na())
centre <- data.matrix(
full_data %>% select(-IncomeGroup) %>% drop_na() %>%
filter(Country.Name == "Singapore") %>%
select(-Country.Name, -Country.Code, -Region)
)
m_distances <- mahalanobis(data_mat, centre, cov_mat, FALSE)
eucl_distances <- dist(data_mat)[(full_data %>% select(-IncomeGroup) %>% drop_na())$Country.Name=="Singapore"]
m_distances
## [1] 27.620240 29.701351 31.542062 37.444222 26.269301 15.896329 10.146802
## [8] 22.640264 13.004247 25.437918 14.364143 29.433331 33.459308 29.334119
## [15] 22.272403 41.015953 28.095130 9.367683 43.913156 26.930948 33.898638
## [22] 29.567077 25.214967 13.964853 33.283812 26.142470 72.883900 25.683435
## [29] 32.799695 24.285367 19.920807 20.386821 16.227415 10.969439 25.168609
## [36] 28.721271 27.102218 31.640234 18.147954 25.526976 22.814734 16.121994
## [43] 26.835732 14.845850 24.365300 47.650874 27.865065 22.689982 19.491508
## [50] 32.867749 74.131996 23.280159 36.322091 5.116855 23.550754 13.286236
## [57] 21.608483 40.968659 25.320646 18.286335 22.908154 27.929269 27.730708
## [64] 21.325499 17.345018 16.886056 29.392207 20.306932 27.349885 14.966199
## [71] 25.317955 28.401142 31.932869 27.751103 24.953389 27.368575 34.779321
## [78] 32.530192 11.295741 22.231831 39.331100 22.934466 27.803792 18.221852
## [85] 25.692535 16.909264 35.692245 17.939227 20.959382 16.924832 16.445287
## [92] 27.828524 16.281439 32.305872 24.969094 0.000000 25.294531 16.395095
## [99] 56.113647 20.392075 25.055230 30.663561 14.993569 7.945078 25.689453
## [106] 28.596239 26.863530 29.998329 30.939925 26.468576 26.388715 19.448250
## [113] 24.663554 11.084884 23.941565 29.952899 27.098614 25.102389
full_data_with_d <- cbind(full_data %>% drop_na(), m_distances)
ggplot(full_data_with_d %>% arrange(m_distances), aes(y = reorder(Country.Name, -m_distances, sum))) +
geom_hline(yintercept = "Netherlands", lty = 2, alpha = 0.8) +
geom_point(aes(x = scale(m_distances)), color = "deepskyblue") +
labs(title = "Mahalanobis Similarity to Singapore",
x = "Mahalanobis distance", y = "Country") +
theme_minimal()
ggplot(full_data_with_d %>% slice_min(m_distances, n = 20), aes(x = m_distances)) +
geom_hline(yintercept = "Netherlands", lty = 2, alpha = 0.8) +
geom_point(aes(y = reorder(Country.Name, -m_distances, sum)), color = "dodgerblue", size = 2) +
labs(title = "Similarity to Singapore (Top 20)",
subtitle = "Mahalanobis distance from Singapore",
x = "Mahalanobis distance", y = "Country") +
theme_minimal() +
theme()
create_mdist_df <- function(df) {
cov_mat <- var(df %>% drop_na() %>% select(-Country.Name, -Country.Code, -Region), na.rm=TRUE)
data_mat <- data.matrix(df %>% drop_na() %>% select(-Country.Name, -Country.Code, -Region))
centre <- data.matrix(
df %>% drop_na() %>%
filter(Country.Name == "Singapore") %>%
select(-Country.Name, -Country.Code, -Region)
)
m_dist <- mahalanobis(data_mat, centre, cov_mat, FALSE)
return (cbind(df %>% drop_na(), m_dist))
}
plot_mdist_data <- function(df_with_mdist, cutoff) {
ggplot(df_with_mdist, aes(y = reorder(Country.Name, -m_dist, sum))) +
geom_hline(yintercept = cutoff, lty = 2, alpha = 0.8) +
geom_point(aes(x = scale(m_dist)), color = "dodgerblue", size = 2) +
labs(title = "Mahalanobis Similarity to Singapore",
x = "Mahalanobis distance", y = "Country") +
theme_minimal()
}
plot_mdist_data_top_20 <- function(df_with_mdist, cutoff) {
ggplot(df_with_mdist %>% slice_min(m_dist, n = 20), aes(y = reorder(Country.Name, -m_dist, sum))) +
geom_hline(yintercept = cutoff, lty = 2, alpha = 0.8) +
geom_point(aes(x = scale(m_dist)), color = "dodgerblue", size = 2) +
labs(title = "Similarity to Singapore (Top 20)",
subtitle = "Mahalanobis distance from Singapore",
x = "Mahalanobis distance", y = "Country") +
theme_minimal()
}
plot_mdist_data_last_20 <- function(df_with_mdist, cutoff) {
ggplot(df_with_mdist %>% slice_max(m_dist, n = 20), aes(y = reorder(Country.Name, -m_dist, sum))) +
geom_hline(yintercept = cutoff, lty = 2, alpha = 0.8) +
geom_point(aes(x = scale(m_dist)), color = "dodgerblue", size = 2) +
labs(title = "Similarity to Singapore (Bottom 20)",
subtitle = "Mahalanobis distance from Singapore",
x = "Mahalanobis distance", y = "Country") +
theme_minimal()
}
df_2 <- full_data %>%
select(Country.Name, Country.Code, Region,
Female.LFPR,
Fertility.Rate, Tertiary.Education,
GDP.PC, Unemployment, Services.Employment,
Gender.Equality) %>%
drop_na()
df_2_mdist <- create_mdist_df(df_2)
plot_mdist_data(df_2_mdist, "United Arab Emirates")
plot_mdist_data_top_20(df_2_mdist, "United Arab Emirates")
plot_mdist_data_last_20(df_2_mdist, "United Arab Emirates")
ggplot(full_data %>%
mutate(Gender.Equality = Gender.Equality * sd(data_GenderEquality_tidy %>% pull(GenderEquality_GlobalEqualityScore_score_2024)) + mean(data_GenderEquality_tidy %>% pull(GenderEquality_GlobalEqualityScore_score_2024))) %>% drop_na(),
aes(x = GDP.PC, y = Female.LFPR,
colour = Gender.Equality)) +
geom_point(size=2, alpha = 0.8) +
scale_color_gradient2(low="black", mid="deepskyblue", high="skyblue", midpoint=0.75,
name="Gender Equality Index", space = "Lab") +
geom_smooth(colour="tomato2", se=FALSE, method="loess", formula = 'y ~ x') +
geom_text_repel(aes(label = Country.Code), colour = "black", size=2, max.overlaps = 20) +
labs(title="Relationship Between Female Labour Force Participation Rate and GDP per Capita",
subtitle="and state of gender equality",
x="GDP per Capita", y="FLFP Rate") +
theme_minimal() +
theme(axis.text = element_blank(),
legend.position = c(.9, .3),
legend.justification = c("right", "top"),
legend.direction = "horizontal") +
guides(colour = guide_colorbar(barwidth = 10, barheight = 1,
title.position = "top", title.hjust = 0.5))
df_3 <- full_data %>%
filter(Region == "East Asia & Pacific") %>%
select(-IncomeGroup) %>%
drop_na()
df_3_mdist <- create_mdist_df(df_3)
plot_mdist_data(df_3_mdist, "Japan")
plot_mdist_data_top_20(df_3_mdist, "Japan")
plot_mdist_data_last_20(df_3_mdist, "Japan")
df_4 <- full_data %>%
filter(Region == "East Asia & Pacific") %>%
select(Country.Name, Country.Code, Region,
Female.LFPR,
Fertility.Rate, Tertiary.Education,
GDP.PC, Unemployment, Services.Employment,
Gender.Equality) %>%
drop_na()
df_4_mdist <- create_mdist_df(df_4)
plot_mdist_data(df_4_mdist, "Malaysia")
plot_mdist_data_top_20(df_4_mdist, "Malaysia")
chosen_1 <- c("Ireland", "Switzerland", "Brunei Darussalam", "Austria", "Denmark", "United States", "Netherlands")
chosen_2 <- c("Ireland", "Qatar", "Switzerland", "United States", "Brunei Darussalam", "United Arab Emirates")
chosen_3 <- c("Indonesia", "Australia", "Viet Nam", "Lao PDR", "Thailand", "Japan")
chosen_4 <- c("Australia", "Korea, Rep.", "Thailand", "Indonesia", "Indonesia", "Viet Nam", "Brunei Darussalam", "Lao PDR", "Japan", "Malaysia")
chosen_combined <- unique(c(chosen_1, chosen_2, chosen_3, chosen_4))
full_data %>% filter(Country.Name == "Singapore")
## # A tibble: 1 × 14
## Country.Name Country.Code Region Female.LFPR[,1] Population[,1]
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Singapore SGP East Asia & Pacific 0.754 -0.219
## # ℹ 9 more variables: Fertility.Rate <dbl[,1]>, Tertiary.Education <dbl[,1]>,
## # GDP.PC <dbl[,1]>, IncomeGroup <dbl[,1]>, Unemployment <dbl[,1]>,
## # Services.Employment <dbl[,1]>, Welfare.Coverage <dbl[,1]>,
## # Maternity.Paternity.Leave.Diff <dbl[,1]>, Gender.Equality <dbl[,1]>
data_chosen_countries <- full_data_with_d %>%
arrange(m_distances) %>%
select(-IncomeGroup, -Country.Code, -Region) %>%
mutate(Female.LFPR = abs(Female.LFPR - 0.7542501),
Population = abs(Population + 0.2189579),
Fertility.Rate = abs(Fertility.Rate + 1.166541),
Tertiary.Education = abs(Tertiary.Education - 1.235351),
GDP.PC = abs(GDP.PC - 3.825699),
Unemployment = abs(Unemployment + 0.6693725),
Services.Employment = abs(Services.Employment - 1.54907),
Welfare.Coverage = abs(Welfare.Coverage - 1.388958),
Maternity.Paternity.Leave.Diff = abs(Maternity.Paternity.Leave.Diff - 0.5738964),
Gender.Equality = abs(Gender.Equality - 0.4557436)) %>%
pivot_longer(Female.LFPR:Gender.Equality, names_to = "Variable", values_to = "Value")
## Global expanded
ggplot(data_chosen_countries %>%
filter(Country.Name %in% chosen_1 | Country.Name %in% (full_data_with_d %>% slice_max(m_distances, n = 20) %>% pull(Country.Name))),
aes(x = factor(Country.Name, full_data_with_d %>%
arrange(m_distances) %>%
pull(Country.Name)),
y = Variable)) +
geom_tile(aes(fill=Value), color = "white") +
scale_fill_gradient2(low="tomato", mid = "white", high="white", midpoint = 4.75,
name = "Difference \n(scaled)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust=1)) +
geom_text(aes(label = round(Value, 2)), color = "black", size = 2.5) +
labs(title = "Difference Between Countries and Singapore (Global)",
subtitle = "on each variable from expanded list",
x = "Country", y = "Variable") +
coord_fixed()
ggplot(data_chosen_countries %>%
filter(Country.Name %in% c(chosen_1)),
aes(x = factor(Country.Name, chosen_1), y = Variable)) +
geom_tile(aes(fill=Value), color = "white") +
scale_fill_gradient2(low="tomato", mid = "white", high="white", midpoint = 4,
name = "Difference \n(scaled)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust=1)) +
geom_text(aes(label = round(Value, 2)), color = "black", size = 2.5) +
labs(title = "Difference Between Selected \nCountries and Singapore (Global)",
subtitle = "on each variable from expanded list",
x = "Country", y = "Variable") +
coord_fixed()
ggplot(data_chosen_countries %>%
filter(Country.Name %in% (full_data_with_d %>% slice_max(m_distances, n = 20) %>% pull(Country.Name))),
aes(x = factor(Country.Name, (full_data_with_d %>% slice_max(m_distances, n = 20) %>% pull(Country.Name))), y = Variable)) +
geom_tile(aes(fill=Value), color = "white") +
scale_fill_gradient2(low="tomato", mid = "white", high="white", midpoint = 4.75,
name = "Difference \n(scaled)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust=1)) +
geom_text(aes(label = round(Value, 2)), color = "black", size = 2.5) +
labs(title = "Difference Between Furthest Countries and Singapore (Global)",
subtitle = "on each variable from expanded list",
x = "Country", y = "Variable") +
coord_fixed()
## Global selected
ggplot(data_chosen_countries %>%
filter(Country.Name %in% c(chosen_2)) %>%
mutate(Value = replace(Value, Variable %in% c("Population", "Welfare.Coverage", "Maternity.Paternity.Leave.Diff"), Inf)),
aes(x = factor(Country.Name, chosen_2), y = Variable)) +
geom_tile(aes(fill=Value), color = "white") +
scale_fill_gradient2(low="tomato", mid = "white", high="white", midpoint = 4,
name = "Difference \n(scaled)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust=1)) +
geom_text(aes(label = round(Value, 2)), color = "black", size = 2.5) +
labs(title = "Difference to Singapore",
x = "Country") +
coord_fixed()
ggplot(data_chosen_countries %>%
filter(Country.Name %in% (df_2_mdist %>% slice_max(m_dist, n = 15) %>% pull(Country.Name))) %>%
mutate(Value = replace(Value, Variable %in% c("Population", "Welfare.Coverage", "Maternity.Paternity.Leave.Diff"), Inf)),
aes(x = factor(Country.Name, (df_2_mdist %>% slice_max(m_dist, n = 20) %>% pull(Country.Name))), y = Variable)) +
geom_tile(aes(fill=Value), color = "white") +
scale_fill_gradient2(low="tomato", mid = "white", high="white", midpoint = 4,
name = "Difference \n(scaled)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust=1)) +
geom_text(aes(label = round(Value, 2)), color = "black", size = 2.5) +
labs(title = "Difference to Singapore",
x = "Country", y = "Variable") +
coord_fixed()
## Regional expanded
ggplot(data_chosen_countries %>%
filter(Country.Name %in% (df_3_mdist %>% pull(Country.Name))),
aes(x = factor(Country.Name, df_3_mdist %>%
arrange(m_dist) %>%
pull(Country.Name)),
y = Variable)) +
geom_tile(aes(fill=Value), color = "white") +
scale_fill_gradient2(low="tomato", mid = "white", high="white", midpoint = 4.75,
name = "Difference \n(scaled)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust=1)) +
geom_text(aes(label = round(Value, 2)), color = "black", size = 2.5) +
labs(title = "Difference Between Countries and Singapore (Regional)",
subtitle = "on each variable from expanded list",
x = "Country", y = "Variable") +
coord_fixed()
ggplot(data_chosen_countries %>%
filter(Country.Name %in% c(chosen_3)),
aes(x = factor(Country.Name, chosen_3), y = Variable)) +
geom_tile(aes(fill=Value), color = "white") +
scale_fill_gradient2(low="tomato", mid = "white", high="white", midpoint = 4,
name = "Difference \n(scaled)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust=1)) +
geom_text(aes(label = round(Value, 2)), color = "black", size = 2.5) +
labs(title = "Difference Between Selected\n Countries and Singapore (Regional)",
subtitle = "on each variable from expanded list",
x = "Country") +
coord_fixed()
ggplot(data_chosen_countries %>%
filter(Country.Name %in% (df_3_mdist %>% slice_max(m_dist, n = 10) %>% pull(Country.Name))),
aes(x = factor(Country.Name, (df_3_mdist %>% slice_max(m_dist, n = 10) %>% pull(Country.Name))), y = Variable)) +
geom_tile(aes(fill=Value), color = "white") +
scale_fill_gradient2(low="tomato", mid = "white", high="white", midpoint = 4,
name = "Difference \n(scaled)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust=1)) +
geom_text(aes(label = round(Value, 2)), color = "black", size = 2.5) +
labs(title = "Difference Between Furthest Countries and Singapore (Regional)",
subtitle = "on each variable from expanded list",
x = "Country", y = "Variable") +
coord_fixed()
## Regional selected
ggplot(data_chosen_countries %>%
filter(Country.Name %in% (df_4_mdist %>% slice_min(m_dist, n = 5) %>% pull(Country.Name))) %>%
mutate(Value = replace(Value, Variable %in% c("Population", "Welfare.Coverage", "Maternity.Paternity.Leave.Diff"), Inf)),
aes(x = factor(Country.Name, df_4_mdist %>% slice_min(m_dist, n = 5) %>% pull(Country.Name)),
y = Variable)) +
geom_tile(aes(fill=Value), color = "white") +
scale_fill_gradient2(low="tomato", mid = "white", high="white", midpoint = 4,
name = "Difference \n(scaled)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust=1)) +
geom_text(aes(label = round(Value, 2)), color = "black", size = 2.5) +
labs(title = "Difference Between Regional \nComparators and Singapore",
subtitle = "on each variable",
x = "Country") +
coord_fixed()
ggplot(data_chosen_countries %>%
filter(Country.Name %in% (df_4_mdist %>% slice_max(m_dist, n = 10) %>% pull(Country.Name))) %>%
mutate(Value = replace(Value, Variable %in% c("Population", "Welfare.Coverage", "Maternity.Paternity.Leave.Diff"), Inf)),
aes(x = factor(Country.Name, (df_4_mdist %>% slice_max(m_dist, n = 10) %>% pull(Country.Name))),
y = Variable)) +
geom_tile(aes(fill=Value), color = "white") +
scale_fill_gradient2(low="tomato", mid = "white", high="white", midpoint = 3,
name = "Difference \n(scaled)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust=1)) +
geom_text(aes(label = round(Value, 2)), color = "black", size = 2.5) +
labs(title = "Difference to Singapore",
x = "Country", y = "Variable") +
coord_fixed()
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.